



use "stata data/end_of_step_4_data", clear


capture drop naics
gduplicates drop


gen dow = dow(date)
drop if inlist(dow,0,6)
drop dow



preserve
keep if inrange(hour,14,17)
keep if inrange(bin,-35,27)

gen dow = dow(date)
drop if inlist(dow,0,6)
drop dow



sort sa date hour
by sa:gen counter = _n
gegen max_counter = max(counter), by(sa)
keep if max_counter == 880


capture drop counter
gen counter = 0
replace counter = 1 if kwh ==0
gegen zero_kwh_sum = sum(counter), by(sa)
gegen double sum_kwh = sum(kwh), by(sa)

keep if zero_kwh_sum <200 //so want to make sure I don't drop a few guys who actually use a lot, even with a few zeroes in there



//size data screen
gegen max_kwh = max(kwh), by(sa)
drop if max_kwh <=1

keep sa date 
gduplicates drop
save "stata data/the_sa_ids_we_want", replace
restore

merge m:1 sa date using "stata data/the_sa_ids_we_want"
keep if _merge == 3
drop _merge


//some temps seem to be 327 as an error, repalce with missings
replace temp_f = . if temp_f > 130


save "stata data/A1 Standard regression with all time", replace



///this little bit finds the like 77 people who opted out during the summer
//currently in dataset, they show up as no PDP, when they actually had pdp for part of the summer
//so do this, and merge it in to make them have PDP for a month or two

use "stata data/PDP extract 2015_0713 full data", clear //from step 1
gen pdp = 0
replace pdp = 1 if status_name == "Active"
keep if pdp ==0
keep if inrange(pdp_end,mdy(6,1,2015), mdy(10,31,2015))
keep sa pdp_end
rename sa sa
save "stata data/mid summer pdp enders", replace



use "stata data/A1 Standard regression with all time", clear


capture drop _merge
rename sa_new sa

//here I put in the people who changed over during the summer to having PDP for some of it
merge m:1 sa using "stata data/mid summer pdp enders"
replace pdp = 1 if date < pdp_end & year == 2015 & _merge == 3
drop _merge pdp_end

save "temp/temp65", replace 

use "temp/temp65", replace 

merge m:1 sa using "stata data/struct winner info with total kwh" //From step 5

keep if _merge == 3
drop _merge 



merge m:1 sa using "stata data\TOU_demographic naics only"
drop if _merge == 2




save "stata data/A1 standard before size drop", replace

use "stata data/A1 standard before size drop", clear

keep if total > 4000
drop if naics_most_2 == 22 //these are utilities 
drop if naics_most_2 == 51 //these are cell towers - have very odd consumption patterns 
drop if total > 50000


drop if kwh == 0
replace optionally_enrolled = 0 if optionally_enrolled  == .


save "stata data/A1 Standard regression dataset ALL hours", replace

keep if inrange(hour,14,17)

save "stata data/A1 Standard regression dataset", replace
save "stata data/A1 Standard regression dataset 1-22-2016", replace



use sa total_kwh bin multiple_collapse_ok share_facility_big_meter many_meter_issue pdp_always_on sm_date optionally_enrolled using  "stata data/A1 Standard regression dataset 1-22-2016", clear
duplicates drop 
save "stata data/with optionally enrolled", replace
drop if optionally_enrolled == 1
save "stata data/all time cleaned saids", replace
